library(tidyverse) # for data cleaning and plotting
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.2 ✓ purrr 0.3.4
## ✓ tibble 3.0.3 ✓ dplyr 1.0.2
## ✓ tidyr 1.1.2 ✓ stringr 1.4.0
## ✓ readr 1.3.1 ✓ forcats 0.5.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(lubridate) # for date manipulation
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
library(openintro) # for the abbr2state() function
## Loading required package: airports
## Loading required package: cherryblossom
## Loading required package: usdata
library(gplots) # for col2hex() function
##
## Attaching package: 'gplots'
## The following object is masked from 'package:stats':
##
## lowess
library(RColorBrewer) # for color palettes
library(ggthemes) # for more themes (including theme_map())
library(plotly) # for the ggplotly() - basic interactivity
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
library(gganimate) # for adding animation layers to ggplots
library(transformr) # for "tweening" (gganimate)
library(gifski) # need the library for creating gifs but don't need to load each time
library(shiny) # for creating interactive apps
library(lubridate) # for date manipulation
library(ggthemes) # for even more plotting themes
library(janitor)
##
## Attaching package: 'janitor'
## The following objects are masked from 'package:stats':
##
## chisq.test, fisher.test
theme_set(theme_minimal())
AAPL <- read_csv("AAPL.csv")
## Parsed with column specification:
## cols(
## Date = col_date(format = ""),
## Open = col_double(),
## High = col_double(),
## Low = col_double(),
## Close = col_double(),
## `Adj Close` = col_double(),
## Volume = col_double()
## )
AAPL <- AAPL%>%
select(-Open, -High, -Low, -`Adj Close`) %>%
mutate(Sector = "Technology",
Company = "AAPL") %>%
mutate(return = (Close - 75.0875)/ 75.0875 * 100)
HON <- read_csv("HON.csv")
## Parsed with column specification:
## cols(
## Date = col_date(format = ""),
## Open = col_double(),
## High = col_double(),
## Low = col_double(),
## Close = col_double(),
## `Adj Close` = col_double(),
## Volume = col_double()
## )
HON <- HON %>%
select(-Open, -High, -Low, -`Adj Close`) %>%
mutate(Sector = "Industrials",
Company = "HON") %>%
mutate(return = (Close - 180.79)/ 180.79 * 100)
UNP <- read_csv("UNP.csv")
## Parsed with column specification:
## cols(
## Date = col_date(format = ""),
## Open = col_double(),
## High = col_double(),
## Low = col_double(),
## Close = col_double(),
## `Adj Close` = col_double(),
## Volume = col_double()
## )
UNP <- UNP %>%
select(-Open, -High, -Low, -`Adj Close`) %>%
mutate(Sector = "Industrials",
Company = "UNP") %>%
mutate(return = (Close - 182.27)/ 182.27 * 100)
UPS <- read_csv("UPS.csv")
## Parsed with column specification:
## cols(
## Date = col_date(format = ""),
## Open = col_double(),
## High = col_double(),
## Low = col_double(),
## Close = col_double(),
## `Adj Close` = col_double(),
## Volume = col_double()
## )
UPS <- UPS %>%
select(-Open, -High, -Low, -`Adj Close`) %>%
mutate(Sector = "Industrials",
Company = "UPS") %>%
mutate(return = (Close - 116.79)/ 116.79 * 100)
JNJ <- read_csv("JNJ.csv")
## Parsed with column specification:
## cols(
## Date = col_date(format = ""),
## Open = col_double(),
## High = col_double(),
## Low = col_double(),
## Close = col_double(),
## `Adj Close` = col_double(),
## Volume = col_double()
## )
JNJ <- JNJ %>%
select(-Open, -High, -Low, -`Adj Close`) %>%
mutate(Sector = "Health care",
Company = "JNJ") %>%
mutate(return = (Close - 145.97)/ 145.97 * 100)
UNH <- read_csv("UNH.csv")
## Parsed with column specification:
## cols(
## Date = col_date(format = ""),
## Open = col_double(),
## High = col_double(),
## Low = col_double(),
## Close = col_double(),
## `Adj Close` = col_double(),
## Volume = col_double()
## )
UNH <- UNH %>%
select(-Open, -High, -Low, -`Adj Close`) %>%
mutate(Sector = "Health care",
Company = "UNH") %>%
mutate(return = (Close - 292.50)/ 292.50 * 100)
RHHBY<- read_csv("RHHBY.csv")
## Parsed with column specification:
## cols(
## Date = col_date(format = ""),
## Open = col_double(),
## High = col_double(),
## Low = col_double(),
## Close = col_double(),
## `Adj Close` = col_double(),
## Volume = col_double()
## )
RHHBY <- RHHBY %>%
select(-Open, -High, -Low, -`Adj Close`) %>%
mutate(Sector = "Health care",
Company = "RHHBY") %>%
mutate(return = (Close - 40.89)/ 40.89 * 100)
MSFT<- read_csv("MSFT.csv")
## Parsed with column specification:
## cols(
## Date = col_date(format = ""),
## Open = col_double(),
## High = col_double(),
## Low = col_double(),
## Close = col_double(),
## `Adj Close` = col_double(),
## Volume = col_double()
## )
MSFT <- MSFT %>%
select(-Open, -High, -Low, -`Adj Close`) %>%
mutate(Sector = "Technology",
Company = "MSFT") %>%
mutate(return = (Close - 160.62 )/ 160.62 * 100)
NVDA<- read_csv("NVDA.csv")
## Parsed with column specification:
## cols(
## Date = col_date(format = ""),
## Open = col_double(),
## High = col_double(),
## Low = col_double(),
## Close = col_double(),
## `Adj Close` = col_double(),
## Volume = col_double()
## )
NVDA <- NVDA %>%
select(-Open, -High, -Low, -`Adj Close`) %>%
mutate(Sector = "Technology",
Company = "NVDA") %>%
mutate(return = (Close - 239.91)/ 239.91 * 100)
GOOGL<- read_csv("GOOGL.csv")
## Parsed with column specification:
## cols(
## Date = col_date(format = ""),
## Open = col_double(),
## High = col_double(),
## Low = col_double(),
## Close = col_double(),
## `Adj Close` = col_double(),
## Volume = col_double()
## )
GOOGL <- GOOGL %>%
select(-Open, -High, -Low, -`Adj Close`) %>%
mutate(Sector = "Communication Services",
Company = "GOOGL") %>%
mutate(return = (Close - 1368.68 )/ 1368.68 * 100)
TCEHY<- read_csv("TCEHY.csv")
## Parsed with column specification:
## cols(
## Date = col_date(format = ""),
## Open = col_double(),
## High = col_double(),
## Low = col_double(),
## Close = col_double(),
## `Adj Close` = col_double(),
## Volume = col_double()
## )
TCEHY <- TCEHY %>%
select(-Open, -High, -Low, -`Adj Close`) %>%
mutate(Sector = "Communication Services",
Company = "TCEHY") %>%
mutate(return = (Close - 49.88)/ 49.88 * 100)
FB<- read_csv("FB.csv")
## Parsed with column specification:
## cols(
## Date = col_date(format = ""),
## Open = col_double(),
## High = col_double(),
## Low = col_double(),
## Close = col_double(),
## `Adj Close` = col_double(),
## Volume = col_double()
## )
FB <- FB %>%
select(-Open, -High, -Low, -`Adj Close`) %>%
mutate(Sector = "Communication Services",
Company = "FB") %>%
mutate(return = (Close - 209.78)/ 209.78 * 100)
NEE<- read_csv("NEE.csv")
## Parsed with column specification:
## cols(
## Date = col_date(format = ""),
## Open = col_double(),
## High = col_double(),
## Low = col_double(),
## Close = col_double(),
## `Adj Close` = col_double(),
## Volume = col_double()
## )
NEE <- NEE %>%
select(-Open, -High, -Low, -`Adj Close`) %>%
mutate(Sector = "Utilities",
Company = "NEE") %>%
mutate(return = (Close - 59.6550)/ 59.6550 * 100)
ENLAY<- read_csv("ENLAY.csv")
## Parsed with column specification:
## cols(
## Date = col_date(format = ""),
## Open = col_double(),
## High = col_double(),
## Low = col_double(),
## Close = col_double(),
## `Adj Close` = col_double(),
## Volume = col_double()
## )
ENLAY <- ENLAY %>%
select(-Open, -High, -Low, -`Adj Close`) %>%
mutate(Sector = "Utilities",
Company = "ENLAY") %>%
mutate(return = (Close - 8.05)/ 8.05 * 100)
IBDSF<- read_csv("IBDSF.csv")
## Parsed with column specification:
## cols(
## Date = col_date(format = ""),
## Open = col_double(),
## High = col_double(),
## Low = col_double(),
## Close = col_double(),
## `Adj Close` = col_double(),
## Volume = col_double()
## )
IBDSF <- IBDSF %>%
select(-Open, -High, -Low, -`Adj Close`) %>%
mutate(Sector = "Utilities",
Company = "IBDSF") %>%
mutate(return = (Close - 10.23)/ 10.23 * 100)
BRKA<- read_csv("BRK-A.csv")
## Parsed with column specification:
## cols(
## Date = col_date(format = ""),
## Open = col_double(),
## High = col_double(),
## Low = col_double(),
## Close = col_double(),
## `Adj Close` = col_double(),
## Volume = col_double()
## )
BRKA <- BRKA %>%
select(-Open, -High, -Low, -`Adj Close`) %>%
mutate(Sector = "Financials",
Company = "BRKA") %>%
mutate(return = (Close - 342261)/ 342261 * 100)
JPM<- read_csv("JPM.csv")
## Parsed with column specification:
## cols(
## Date = col_date(format = ""),
## Open = col_double(),
## High = col_double(),
## Low = col_double(),
## Close = col_double(),
## `Adj Close` = col_double(),
## Volume = col_double()
## )
JPM <- JPM %>%
select(-Open, -High, -Low, -`Adj Close`) %>%
mutate(Sector = "Financials",
Company = "JPM") %>%
mutate(return = (Close - 141.09)/ 141.09 * 100)
V<- read_csv("V.csv")
## Parsed with column specification:
## cols(
## Date = col_date(format = ""),
## Open = col_double(),
## High = col_double(),
## Low = col_double(),
## Close = col_double(),
## `Adj Close` = col_double(),
## Volume = col_double()
## )
V <- V %>%
select(-Open, -High, -Low, -`Adj Close`) %>%
mutate(Sector = "Financials",
Company = "V") %>%
mutate(return = (Close - 191.12)/ 191.12 * 100)
AMZN<- read_csv("AMZN.csv")
## Parsed with column specification:
## cols(
## Date = col_date(format = ""),
## Open = col_double(),
## High = col_double(),
## Low = col_double(),
## Close = col_double(),
## `Adj Close` = col_double(),
## Volume = col_double()
## )
AMZN <- AMZN %>%
select(-Open, -High, -Low, -`Adj Close`) %>%
mutate(Sector = "Communication Discretionary",
Company = "AMZN") %>%
mutate(return = (Close - 1898.01)/ 1898.01 * 100)
TSLA<- read_csv("TSLA.csv")
## Parsed with column specification:
## cols(
## Date = col_date(format = ""),
## Open = col_double(),
## High = col_double(),
## Low = col_double(),
## Close = col_double(),
## `Adj Close` = col_double(),
## Volume = col_double()
## )
TSLA <- TSLA %>%
select(-Open, -High, -Low, -`Adj Close`) %>%
mutate(Sector = "Communication Discretionary",
Company = "TSLA") %>%
mutate(return = (Close - 86.052)/ 86.052 * 100)
BABA<- read_csv("BABA.csv")
## Parsed with column specification:
## cols(
## Date = col_date(format = ""),
## Open = col_double(),
## High = col_double(),
## Low = col_double(),
## Close = col_double(),
## `Adj Close` = col_double(),
## Volume = col_double()
## )
BABA <- BABA %>%
select(-Open, -High, -Low, -`Adj Close`) %>%
mutate(Sector = "Communication Discretionary",
Company = "BABA") %>%
mutate(return = (Close - 219.77 )/ 219.77 * 100)
WMT<- read_csv("WMT.csv")
## Parsed with column specification:
## cols(
## Date = col_date(format = ""),
## Open = col_double(),
## High = col_double(),
## Low = col_double(),
## Close = col_double(),
## `Adj Close` = col_double(),
## Volume = col_double()
## )
WMT <- WMT %>%
select(-Open, -High, -Low, -`Adj Close`) %>%
mutate(Sector = "Consumer Staples",
Company = "WMT") %>%
mutate(return = (Close - 118.94)/ 118.94 * 100)
NSRGF<- read_csv("NSRGF.csv")
## Parsed with column specification:
## cols(
## Date = col_date(format = ""),
## Open = col_double(),
## High = col_double(),
## Low = col_double(),
## Close = col_double(),
## `Adj Close` = col_double(),
## Volume = col_double()
## )
NSRGF <- NSRGF %>%
select(-Open, -High, -Low, -`Adj Close`) %>%
mutate(Sector = "Consumer Staples",
Company = "NSRGF") %>%
mutate(return = (Close - 108.06 )/ 108.06 * 100)
PG<- read_csv("PG.csv")
## Parsed with column specification:
## cols(
## Date = col_date(format = ""),
## Open = col_double(),
## High = col_double(),
## Low = col_double(),
## Close = col_double(),
## `Adj Close` = col_double(),
## Volume = col_double()
## )
PG <- PG %>%
select(-Open, -High, -Low, -`Adj Close`) %>%
mutate(Sector = "Consumer Staples",
Company = "PG") %>%
mutate(return = (Close - 123.41 )/ 123.41 * 100)
SPY<- read_csv("SPY.csv")
## Parsed with column specification:
## cols(
## Date = col_date(format = ""),
## Open = col_double(),
## High = col_double(),
## Low = col_double(),
## Close = col_double(),
## `Adj Close` = col_double(),
## Volume = col_double()
## )
SPY <- SPY %>%
select(-Open, -High, -Low, -`Adj Close`) %>%
mutate(Sector = "All field",
Company = "SPY") %>%
mutate(return = (Close - 324.87 )/ 324.87 * 100)
XOM <- read_csv("XOM.csv")
## Parsed with column specification:
## cols(
## Date = col_date(format = ""),
## Open = col_double(),
## High = col_double(),
## Low = col_double(),
## Close = col_double(),
## `Adj Close` = col_double(),
## Volume = col_double()
## )
XOM <- XOM %>%
select(-Open, -High, -Low, -`Adj Close`) %>%
mutate(Sector="Energy",
Company= "XOM") %>%
mutate(return = (Close - 70.90)/ 70.90 * 100)
CVX <- read_csv("CVX.csv")
## Parsed with column specification:
## cols(
## Date = col_date(format = ""),
## Open = col_double(),
## High = col_double(),
## Low = col_double(),
## Close = col_double(),
## `Adj Close` = col_double(),
## Volume = col_double()
## )
CVX <- CVX %>%
select(-Open, -High, -Low, -`Adj Close`) %>%
mutate(Sector="Energy",
Company= "CVX") %>%
mutate(return = (Close - 121.43)/ 121.43 * 100)
RDS_A <-read_csv("RDS-A.csv")
## Parsed with column specification:
## cols(
## Date = col_date(format = ""),
## Open = col_double(),
## High = col_double(),
## Low = col_double(),
## Close = col_double(),
## `Adj Close` = col_double(),
## Volume = col_double()
## )
RDS_A <- RDS_A %>%
select(-Open, -High, -Low, -`Adj Close`) %>%
mutate(Sector="Energy",
Company= "RDS_A")%>%
mutate(return = (Close - 59.74)/ 59.74 * 100)
energy <- rbind(XOM,CVX,RDS_A)
AMT <- read_csv("AMT.csv")
## Parsed with column specification:
## cols(
## Date = col_date(format = ""),
## Open = col_double(),
## High = col_double(),
## Low = col_double(),
## Close = col_double(),
## `Adj Close` = col_double(),
## Volume = col_double()
## )
AMT <- AMT %>%
select(-Open, -High, -Low, -`Adj Close`) %>%
mutate(Sector="Real Estate",
Company= "AMT")%>%
mutate(return = (Close - 228.50)/ 228.50 * 100)
KE <- read_csv("KE.csv")
## Parsed with column specification:
## cols(
## Date = col_date(format = ""),
## Open = col_double(),
## High = col_double(),
## Low = col_double(),
## Close = col_double(),
## `Adj Close` = col_double(),
## Volume = col_double()
## )
KE <- KE %>%
select(-Open, -High, -Low, -`Adj Close`) %>%
mutate(Sector="Real Estate",
Company= "KE")%>%
mutate(return = (Close - 17.690)/ 17.690 * 100)
PLD <- read_csv("PLD.csv")
## Parsed with column specification:
## cols(
## Date = col_date(format = ""),
## Open = col_double(),
## High = col_double(),
## Low = col_double(),
## Close = col_double(),
## `Adj Close` = col_double(),
## Volume = col_double()
## )
PLD <- PLD %>%
select(-Open, -High, -Low, -`Adj Close`) %>%
mutate(Sector="Real Estate",
Company= "PLD")%>%
mutate(return = (Close - 88.40 )/ 88.40 * 100)
real_estate <- rbind(AMT,KE,PLD)
BHP <-read_csv("BHP.csv")
## Parsed with column specification:
## cols(
## Date = col_date(format = ""),
## Open = col_double(),
## High = col_double(),
## Low = col_double(),
## Close = col_double(),
## `Adj Close` = col_double(),
## Volume = col_double()
## )
BHP <- BHP%>%
select(-Open, -High, -Low, -`Adj Close`) %>%
mutate(Sector="Material",
Company= "RDS_A") %>%
mutate(return = (Close - 54.92)/ 54.92 * 100)
LIN <-read_csv("LIN.csv")
## Parsed with column specification:
## cols(
## Date = col_date(format = ""),
## Open = col_double(),
## High = col_double(),
## Low = col_double(),
## Close = col_double(),
## `Adj Close` = col_double(),
## Volume = col_double()
## )
LIN <- LIN %>%
select(-Open, -High, -Low, -`Adj Close`) %>%
mutate(Sector="Material",
Company= "LIN") %>%
mutate(return = (Close - 210.74)/ 210.74 * 100)
RIO <-read_csv("RIO.csv")
## Parsed with column specification:
## cols(
## Date = col_date(format = ""),
## Open = col_double(),
## High = col_double(),
## Low = col_double(),
## Close = col_double(),
## `Adj Close` = col_double(),
## Volume = col_double()
## )
RIO <- RIO %>%
select(-Open, -High, -Low, -`Adj Close`) %>%
mutate(Sector="Material",
Company= "RIO")%>%
mutate(return = (Close - 59.89)/ 59.89 * 100)
material <- rbind(BHP,LIN,RIO)
communication_services <- rbind(GOOGL, TCEHY, FB)
utilities <- rbind(NEE, ENLAY, IBDSF)
financials <- rbind(JPM, BRKA, V)
communication_discretionary <- rbind(AMZN, TSLA, BABA)
consumer_staple <- rbind(WMT, NSRGF, PG)
tech <- rbind(AAPL,MSFT,NVDA)
industrials <- rbind(HON,UNP,UPS)
health <- rbind(JNJ,UNH,RHHBY)
sp <- rbind(communication_services, utilities, financials, communication_discretionary, consumer_staple,
tech, industrials, health, real_estate, material, energy, SPY)
return_type <- sp %>%
filter(Sector!="All field") %>%
select(-Close, -Volume) %>%
group_by(Date, Sector) %>%
mutate(avg_return = mean(return),
type_return = ifelse(avg_return<0, "negative", "positive"))%>%
ungroup() %>%
group_by(Sector) %>%
mutate(`Proportion negative` = mean(type_return == "negative"),
`Proportion positive` = mean(type_return == "positive")) %>%
pivot_longer(cols = `Proportion negative`:`Proportion positive`,
names_to = "prop_type",
values_to = "prop") %>%
summarize(Sector, prop_type, prop) %>%
distinct(Sector, prop_type, prop)
## `summarise()` regrouping output by 'Sector' (override with `.groups` argument)
return_type %>%
ggplot(aes(x = prop, y = prop_type, fill = prop_type )) +
geom_col()+
geom_text(aes(label = round(prop,digits = 2)), size = 2)+
facet_wrap(~Sector)+
labs(title = "Proportions of negative and positive daily average returns per sector",
fill = "Type of return",
x = "",
y = "")+
theme(legend.position = 0,
panel.grid.major.y = element_blank(),
panel.grid.minor.y=element_blank())
This graph compares the proportion of time in which the average of the cumulative return of the best stocks per sector have been over or under the base price. We used daily observations. We found the daily cumulative return relative to the first closing price. Then we averaged the daily cumulative return across sectors. The graph distinguishes between sectors, showing that Communication Discretionary was the sector with the highest proportion of positive returns relative to the negative returns. This meaning, that 97% of the time the Communication Discretionary secto had positive returns relative to the initial price. Had a person bought shares of the companies in this sector in January 2nd, they would have slept peacefully 97% of the time. On the other hand, the Energy sector was the sector with the highest proportion of negative returns relative to positive returns. There are multiple reasons for the high positive returns in the technology sector, including the fact that Apple released a new Iphone model in October, the strong predictions for earnings and sales and the reported statistics exceeding analysts expectations previously for the three companies the year before. This proves the point that positive returns on Technology were the main driver for positive returns in the market indexes.
volume <- sp %>%
filter(Sector!="All field") %>%
select(-Close, -return) %>%
mutate(month = month(Date,label = TRUE),
year = year(Date)) %>%
group_by(Sector, month, year) %>%
summarize(`Total volume` = sum(Volume)) %>%
ungroup() %>%
mutate(date = ymd(paste(year,month,"01"))) %>%
mutate(Sector = fct_reorder(Sector, `Total volume`, median, .desc = TRUE)) %>%
ggplot(aes(x = date, y = `Total volume`, fill = Sector))+
geom_col()+
labs(title = "Monthly amount of transactions per sector last year",
y = "",
x = "")+
scale_x_date(date_labels = "%b", date_breaks = "3 months")+
scale_y_continuous(labels = scales::comma)+
theme(legend.position = "none",
panel.grid.major.x = element_blank(),
panel.grid.major.y = element_blank(),
panel.grid.minor.y =element_blank())+
facet_wrap(~Sector)
## `summarise()` regrouping output by 'Sector', 'month' (override with `.groups` argument)
ggplotly(volume,
tooltip = c("y"))
real_estate %>%
ggplot(aes(x = Date, y = Close, color = Company)) +
geom_line()
tech %>%
ggplot(aes(x = Date, y = Close, color = Company)) +
geom_line()
The bar graph shows the sum of the daily transactions for the three biggest companies in each sector. We can see that the sector with the highest volume is Technology, and the sector witht he lowest volume is the Real State sector. The high volumes in the Technology sector potentially reflect that investors preffered large technology companies over the pandemic. This companies have large market caps, and given that the 5 largest companies in the Technology sector account for 17.5% of the S&P, these “blue chip” companies are expected to have larger volumes. One of the reasons is their liquidity. They are very liquid because many people are trading them;therefore, you can find buyers and sellers quickly. As a result there are not price distortions as a consequence of delays in transactions. On the other hand, this could be just a continuation of past trends, given that the technology sector has been the most popular in the last years. Another potential explanation is that Technology stocks are always more risky given the larger returns; therefore, there are major price swings that cause people to increase their transactions. Also, larger volumes reflect the strenght of price changes during the pandemic. In the case of Real State, low volumes reflect the lack of strenght in price changes. As we can see in the graph for Real State companies, the trend is flat, proving that there weren’t significant changes in price. Therefore,there weren’t changes in the demand or supply of Real State stock.
We can also observe that in periods with low prices, the volume tends to increase. In this case, it was in March given the declaration of sanitary emergency and the first COVID-19 cases in the country.